/**********************************************************************
DO FILE SUMMARY

This file appends and cleans SIGSA data from the Ministry of Health for the K pilot.

**********************************************************************/

clear
cls
version 18
set more off
capture log close

* Set graph options and scheme
set scheme s1color
graph set window fontface "Arial"

* Set directory 
cd "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/"

* Use the macro $S_DATE to save files with current date
global date : di %tdCCYY.NN.DD date("$S_DATE","DMY")
log using "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Log files/log_sigsa_${date}.log", replace

/*******************************************************************************
1. Import datasets shared by digitadores and save as dta files
*******************************************************************************/

/*Note on the shape of the data:

SIGSA comes to us in an extreme long format. Specifically, each medication and diagnostic code are separate rows. For example, a patient who receives two medication's at the same clinical visit will therefore have two rows in the data. 

There are 2 two wide reshaping formats. Different outcomes will require different shapes.

1st wide format: Each row is a distinct clinical visit for a given patient. In this reshaping format, patient will have multiple rows corresponding to different visit dates. Outcomes in this shape:

	Treatment rates
	Control rates (assuming glycemic and BP data can be merged in)

2nd wide format: Each row is a distinct patient, and columns will reflect different visit dates as well as subcategories of medication and diagnostic codes within those visit dates. Outcomes in this shape:

	Number of target health districts that met enrollment goals
	Proportion of patient participants with subsequent follow-up visit within 3 months */

// * The SIGSA data does not come year-delinieated (only month and day). Therefore, we must import each year separately and addend a year variable.
//
// * 2023 data from Esquipulas
// import excel "SIGSA P-ANO/2023/ESQUIPULAS/HIPERTENSOS Y DIABETICOS 2023 (FORMATO CORRECTO).xlsx", sheet("Sigsa3_PersonalSalud(89)") firstrow cellrange(A3:AK115100) clear
// gen year = 2023
// save "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/SIGSA Esquipulas 2023.dta", replace
//
// * 2024 data from Esquipulas through May 31, 2024
// import excel "SIGSA P-ANO/2024/ESQUIPULAS/PRODUCCION ENERO - MAYO 2024 DISTRITO.xlsx", sheet("Sigsa3_PersonalSalud(82)") firstrow cellrange(A3:AK46844) clear
// gen year = 2024
// save "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/SIGSA Esquipulas 2024.dta", replace
//
// * 2023 data from Solola	
// import excel "SIGSA P-ANO/2023/SOLOLA/DIABETES - 3 DISTRITOS 2023.xlsx", sheet("Sigsa3_PersonalSalud (2)") firstrow cellrange(A3:AK75701) clear
// gen year = 2023
// save "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/SIGSA Solola 2023.dta", replace
//
// * 2024 data from Solola through May 25, 2024
// import excel "SIGSA P-ANO/2024/SOLOLA/Hipertension y Diabetes - Santa Cruz 2024.xlsx", sheet("Sigsa3_PersonalSalud (33)") firstrow cellrange(A3:AK39581) clear // note that this row is the last for 2024; there are December dates from the prior year that are below this row.
// gen year = 2024
// save "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/SIGSA Solola 2024.dta", replace

/*******************************************************************************
2. Append files
*******************************************************************************/

use "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/SIGSA Esquipulas 2023.dta", clear

append using "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/SIGSA Esquipulas 2024.dta"

append using "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/SIGSA Solola 2023.dta"

append using "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/SIGSA Solola 2024.dta"

/*******************************************************************************
3. Clean dataset
*******************************************************************************/

compress

keep Area Distrito Servicio PersonalSalud FechadelaConsulta NombredelPaciente Sexo Pueblo ComunidadLingüistica DescripcióndeDiagnosticoContr CódigoCIE10 DescripciónMedicamento PresentaciónMedicamento CantidaddeMedicamento year EdadenAños EdadenDías EdadenMeses Comunidad

rename Area area
rename Distrito distrito
rename Servicio health_facility
rename PersonalSalud name_healthworker
rename FechadelaConsulta visit_date_raw
rename NombredelPaciente name_patient
rename Sexo sex
rename Pueblo ethnic_group
rename EdadenAños age_years
rename EdadenMeses age_months
rename EdadenDías age_days
rename Comunidad community
rename ComunidadLingüistica language
rename DescripciónMedicamento med
rename PresentaciónMedicamento dose
rename CantidaddeMedicamento quantity_med_dispensed
rename DescripcióndeDiagnosticoContr diagnosis
rename CódigoCIE10 icd

* Encoding
gen female = .
replace female = 0 if sex == "M"
replace female = 1 if sex == "F"
drop sex

encode language, gen(language2)
drop language
rename language2 language

encode ethnic_group, gen(ethnic_group2)
drop ethnic_group
rename ethnic_group2 ethnic_group 

* Eligibility is adults 18+ years of age
drop if age_years <18 
		
* Correcting date of patient visit from imported data -------------------------
	format visit_date_raw %td

	* Extract the month and day from the original date variable
	gen month = month(visit_date_raw)
	gen day = day(visit_date_raw)

	* Combine the new year variable with the extracted month and day
	gen visit_date = mdy(month, day, year)
	drop month day visit_date_raw

	* Convert the string-formatted new date into a daily date format, assuming the year2 is in YYYY format
	format visit_date %td
	gsort + visit_date

	* Generating a month variable which is useful to keep around
	gen mdate = mofd(visit_date)
	tab mdate
	format mdate %tm // best for graphing
	tab mdate if area == "CHIQUIMULA" // 2024 data from Esquipulas through May 31, 2024
	tab mdate if area == "SOLOLÁ" // 2024 data from Solola through May 25, 2024

* Defining where the the project was implemented -------------------------------

	* Esquipulas	
		
		// 	Servicios Chiquimula (son 5): 
		// 	1. Esquipulas (centro de salud)
		// 	2. Chanmagua
		// 	3. Horcones
		// 	4. Timushan
		// 	5. Las Peñas (estos ultimos son puestos de salud)
			
	tab health_facility if area == "CHIQUIMULA"		
			
	gen health_facility_esquipulas = 0
	replace health_facility_esquipulas = 1 if health_facility == "(C/S) ESQUIPULAS"
	replace health_facility_esquipulas = 1 if health_facility == "(P/S) CHANMAGUA"
	replace health_facility_esquipulas = 1 if health_facility == "(P/S) HORCONES"
	replace health_facility_esquipulas = 1 if health_facility == "(P/S) TIMUSHAN"
	replace health_facility_esquipulas = 1 if health_facility == "(P/S) LAS PEÑAS"
		
	* Solola

		// 	Servicios Sololá: All included data

	tab health_facility if area == "SOLOLÁ"
	gen health_facility_solola = 0
	replace health_facility_solola = 1 if area == "SOLOLÁ"

	* Dropping non-intervention districts
	keep if health_facility_esquipulas == 1 | health_facility_solola == 1

* Generating whether medication is a hypertension or diabetes med --------------

	/* This is defined by the following:
	
		Note that the data is in the extreme long format at this stage.
	
		1. Commonly prescribed med in the formulary
		2. Dispensed at least 7 pills
		3. Appropriate associated diagnostic code */

	gen dm_med = 0
	replace dm_med = 1 if ///
		inlist(med,"Metformina","Glimepirida","Glibenclamida") & ///
		inrange(quantity_med_dispensed,7,999) & ///
		(diagnosis == "Diabetes mellitus especificada, con complicaciones no especificadas" | ///
			diagnosis == "Diabetes mellitus no insulinodependiente, con complicaciones múltiples" | ///
			diagnosis == "Diabetes mellitus insulinodependiente" | ///
			diagnosis == "Diabetes mellitus, no especificada, con complicaciones neurológicas" | ///
			diagnosis == "Diabetes mellitus, no especificada, con coma" | ///
			diagnosis == "Diabetes mellitus, no especificada, con cetoacidosis" | ///
			diagnosis == "Diabetes mellitus, no especificada, con complicaciones no especificadas" | ///
			diagnosis == "Diabetes mellitus, no especificada, sin mención de complicación")	
		
	gen htn_med = 0
	replace htn_med = 1 if ///
		inlist(med,"Enalapril","Losartan Potásico","Hidroclorotiazida") & ///
		inrange(quantity_med_dispensed,7,999) & ///
		(diagnosis == "Hipertensión esencial (primaria)" | ///
			diagnosis == "Hipertensión secundaria, no especificada" | ///
			diagnosis == "Hipertensión renovascular" | ///
			diagnosis == "Enfermedad renal hipertensiva sin insuficiencia renal")		

* Generating whether diagnostic code meets enrollment definition ---------------
	gen dm_enroll = 0
	replace dm_enroll = 1 if ///
		(diagnosis == "Diabetes mellitus especificada, con complicaciones no especificadas" | ///
			diagnosis == "Diabetes mellitus no insulinodependiente, con complicaciones múltiples" | ///
			diagnosis == "Diabetes mellitus insulinodependiente" | ///
			diagnosis == "Diabetes mellitus, no especificada, con complicaciones neurológicas" | ///
			diagnosis == "Diabetes mellitus, no especificada, con coma" | ///
			diagnosis == "Diabetes mellitus, no especificada, con cetoacidosis" | ///
			diagnosis == "Diabetes mellitus, no especificada, con complicaciones no especificadas" | ///
			diagnosis == "Diabetes mellitus, no especificada, sin mención de complicación")	
		
	gen htn_enroll = 0
	replace htn_enroll = 1 if ///
		(diagnosis == "Hipertensión esencial (primaria)" | ///
			diagnosis == "Hipertensión secundaria, no especificada" | ///
			diagnosis == "Hipertensión renovascular" | ///
			diagnosis == "Enfermedad renal hipertensiva sin insuficiencia renal")	

	keep if htn_enroll == 1 | dm_enroll == 1 // for our purposes, this is all we care about			
		
* Cleaning up doses from string to numeric format ------------------------------

		// Per ChatGPT:
		// 	1.	Use regexm() to match the pattern of digits followed by "mg" in the dose string.
		// 	2.	Use regexs(0) to extract the matched pattern, which is the numeric portion.
		// 	3.	Convert the extracted string into a numeric value using real() and store it in the dose_mg variable.

	gen dose_mg = real(regexs(0)) if regexm(dose, "[0-9]+")
		
	* Generate inferred daily dose
	gen daily_dose = dose_mg * quantity_med_dispensed / 30

	drop dose
	drop quantity_med_dispensed
	
	merge m:1 name_healthworker using "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Copia de type of health worker in HEARTS dm pilot - 2024-08-22_AW LF2 - DCF edits.dta", keepusing(type) // 
	drop _merge
		
/*******************************************************************************
4. 1st wide format reshaping, cleaning, and analysis: Each row is a distinct clinical visit
*******************************************************************************/	
	
	* Creates within patient-visit med id (note that only htn or dm meds have been kept in the data at this point)
	bysort name_patient age_days age_months age_years health_facility female visit_date: gen med_id=(_n)
		
	* Reshape to 1st wide format (each row is a distinct clinical visit for a given patient)
	reshape wide dose_mg daily_dose med htn_med dm_med dm_enroll htn_enroll icd name_healthworker type diagnosis, i(name_patient age_days age_months age_years health_facility female visit_date ethnic_group language community) j(med_id)

* Defining when the intervention started and ended in Solola -------------------
	
	// 1.1 Sololá: 5 octubre 2023 (23284) - 5 abril 2024 (23471)
		
	* Step 1: Define time zero as October 5, 2023
	local time_zero = mdy(10, 5, 2023)

	* Step 2: Calculate the number of days between each date and time zero
	gen days_since_time_zero_solola = visit_date - `time_zero' if area == "SOLOLÁ"

	* Step 3: Convert days into weeks and create 4-week bins
	gen weeks_since_time_zero_solola = days_since_time_zero_solola / 7
	gen four_week_bins_solola = floor(weeks_since_time_zero_solola / 4)

	tab four_week_bins_solola

	* Step 4: Label the four-week bins if needed
	label define four_week_bins_solola_labels ///
	-12 "-48 to -45 weeks (pre-intervention)" ///
	-11 "-44 to -41 weeks (pre-intervention)" ///
	-10 "-40 to -37 weeks (pre-intervention)" ////
	-9 "-36 to -33 weeks (pre-intervention)" ///
	-8 "-32 to -29 weeks (pre-intervention)" ///
	-7 "-28 to -25 weeks (pre-intervention)" ///
	-6 "-24 to -21 weeks (pre-intervention)" ///
	-5 "-20 to -17 weeks (pre-intervention)" ///
	-4 "-16 to -13 weeks (pre-intervention)" ///
	-3 "-12 to -9 weeks (pre-intervention)" ///
	-2 "-8 to -5 weeks (pre-intervention)" ///
	-1 "-4 to -1 weeks (pre-intervention)" ///
	 0 "0-3 weeks (post-intervention)" ///
	 1 "4-7 weeks (post-intervention)" ///
	 2 "8-11 weeks (post-intervention)" ///
	 3 "12-15 weeks (post-intervention)" ///
	 4 "16-19 weeks (post-intervention)" ///
	 5 "20-23 weeks (post-intervention)" ///
	 6 "24-27 weeks (post-intervention)" ///
	 7 "28-31 weeks (post-intervention)" ///
	 8 "32-35 weeks (post-intervention)" ///
	 9 "36-39 weeks (post-intervention)" ///
	 10 "40-43 weeks (post-intervention)"

	label values four_week_bins_solola four_week_bins_solola_labels

	* Step 5: Clean and inspect the new variable
	tab four_week_bins_solola
	tab days_since_time_zero_solola if four_week_bins_solola == -10 // 25 days in first bin
	tab days_since_time_zero_solola if four_week_bins_solola == 8 // 12 days in last bin
	replace four_week_bins_solola = . if four_week_bins_solola < -9 | four_week_bins_solola > 5
		// Keeping 9 bins pre-implementation data and 6 bins post-implementation data (7 bins of 4 weeks including time == 0 which counts as post-intervention)
	tab four_week_bins_solola

* Defining when the intervention started and ended in Esquipulas -------------------
	
	// * 1.2 Esquipulas: 12 noviembre 2023 (23326) - 12 mayo 2024 (23508)
		
	* Step 1: Define time zero as November 12, 2023
	local time_zero = mdy(11, 12, 2023)

	* Step 2: Calculate the number of days between each date and time zero
	gen days_since_time_zero_esquipulas = visit_date - `time_zero' if area == "CHIQUIMULA"

	* Step 3: Convert days into weeks and create 4-week bins
	gen weeks_since_time_zero_esquipulas = days_since_time_zero_esquipulas / 7
	gen four_week_bins_esquipulas = floor(weeks_since_time_zero_esquipulas / 4)

	tab four_week_bins_esquipulas

	* Step 4: Label the four-week bins if needed
	label define four_week_bins_esquipulas_labels ///
	-12 "-48 to -45 weeks (pre-intervention)" ///
	-11 "-44 to -41 weeks (pre-intervention)" ///
	-10 "-40 to -37 weeks (pre-intervention)" ////
	-9 "-36 to -33 weeks (pre-intervention)" ///
	-8 "-32 to -29 weeks (pre-intervention)" ///
	-7 "-28 to -25 weeks (pre-intervention)" ///
	-6 "-24 to -21 weeks (pre-intervention)" ///
	-5 "-20 to -17 weeks (pre-intervention)" ///
	-4 "-16 to -13 weeks (pre-intervention)" ///
	-3 "-12 to -9 weeks (pre-intervention)" ///
	-2 "-8 to -5 weeks (pre-intervention)" ///
	-1 "-4 to -1 weeks (pre-intervention)" ///
	 0 "0-3 weeks (post-intervention)" ///
	 1 "4-7 weeks (post-intervention)" ///
	 2 "8-11 weeks (post-intervention)" ///
	 3 "12-15 weeks (post-intervention)" ///
	 4 "16-19 weeks (post-intervention)" ///
	 5 "20-23 weeks (post-intervention)" ///
	 6 "24-27 weeks (post-intervention)" ///
	 7 "28-31 weeks (post-intervention)" ///
	 8 "32-35 weeks (post-intervention)" ///
	 9 "36-39 weeks (post-intervention)" ///
	 10 "40-43 weeks (post-intervention)"

	label values four_week_bins_esquipulas four_week_bins_esquipulas_labels

	* Step 5: Clean and inspect the new variable
	tab four_week_bins_esquipulas
	tab days_since_time_zero_esquipulas if four_week_bins_esquipulas == -10 // 25 days in first bin
	tab days_since_time_zero_esquipulas if four_week_bins_esquipulas == 8 // 12 days in last bin
	replace four_week_bins_esquipulas = . if four_week_bins_esquipulas < -9 | four_week_bins_esquipulas > 5
		// Keeping 9 bins pre-implementation data and 6 bins post-implementation data (7 bins of 4 weeks including time == 0 which counts as post-intervention)
	tab four_week_bins_esquipulas
	
	drop days_since_time_zero_solola weeks_since_time_zero_solola days_since_time_zero_esquipulas weeks_since_time_zero_esquipulas
	
	*  KEEPING VISITS 9 MONTHS PRE OR 6 MONTHS POSTS IMPLEMENTATION PERIOD !!!!!!!!!!!!!!!!!!!!
	drop if area == "SOLOLÁ" & four_week_bins_solola == .
	drop if area == "CHIQUIMULA" & four_week_bins_esquipulas == .
	
* Defining the hypertension and diabetes treatment rate variables --------------
	
	* Defining if any htn dm med was prescribed at the visit
	gen htn_med_any = 0
	replace htn_med_any = 1 if ///
		htn_med1 == 1 | ///
		htn_med2 == 1 | ///
		htn_med3 == 1 | ///
		htn_med4 == 1 | ///
		htn_med5 == 1 | ///
		htn_med6 == 1 //

	* Defining if any diabetes med was prescribed at the visit
	gen dm_med_any = 0
	replace dm_med_any = 1 if ///
		dm_med1 == 1 | ///
		dm_med2 == 1 | ///
		dm_med3 == 1 | ///
		dm_med4 == 1 | ///
		dm_med5 == 1 | ///
		dm_med6 == 1 //
		
	* Defining if any sulfylurea was prescribed at the visit
	gen sulfonylurea_flag = 0

		forvalues i = 1/6 {
			replace sulfonylurea_flag = 1 if med`i' == "Glibenclamida" | med`i' == "Glimepirida"
		}
		
	* Defining the hypertension and diabetes # of meds variables -------------------
		
	* # of hypertension meds at visit
		
	gen count_htn_med_visit = 0

		forvalues i = 1/6 { // Adjust the number 3 to the number of htn_med variables you have
			replace count_htn_med_visit = count_htn_med_visit + (inrange(htn_med`i',1,1))
		}

	sum count_htn_med_visit if inrange(count_htn_med_visit,1,6), d

	* # of diabetes meds at visit
		
	gen count_dm_med_visit = 0

		forvalues i = 1/6 { // Adjust the number 3 to the number of htn_med variables you have
			replace count_dm_med_visit = count_dm_med_visit + (inrange(dm_med`i',1,1))
		}

	sum count_dm_med_visit if inrange(count_dm_med_visit,1,6), d

* Marking our htn and meds by visit --------------------------------------------
	
	gen enalapril_flag = 0

		forvalues i = 1/6 {
			replace enalapril_flag = 1 if med`i' == "Enalapril"
		}

	gen losartan_flag = 0

		forvalues i = 1/6 {
			replace losartan_flag = 1 if med`i' == "Losartan Potásico"
		}
		
	gen hctz_flag = 0

		forvalues i = 1/6 {
			replace hctz_flag = 1 if med`i' == "Hidroclorotiazida"
		}	
		
	gen metformin_flag = 0

		forvalues i = 1/6 {
			replace metformin_flag = 1 if med`i' == "Metformina"
		}
		
	gen glibenclamida_flag = 0

		forvalues i = 1/6 {
			replace glibenclamida_flag = 1 if med`i' == "Glibenclamida"
		}
		
	gen glimepirida_flag = 0

		forvalues i = 1/6 {
			replace glimepirida_flag = 1 if med`i' == "Glimepirida"
		}

* Calculate monthly doses ------------------------------------------------------
	gen daily_dose_enalapril = .

		forvalues i = 1/6 {
			replace daily_dose_enalapril = daily_dose`i' if med`i' == "Enalapril"
		}

	gen daily_dose_losartan = .

		forvalues i = 1/6 {
			replace daily_dose_losartan = daily_dose`i' if med`i' == "Losartan Potásico"
		}

	gen daily_dose_hctz = .

		forvalues i = 1/6 {
			replace daily_dose_hctz = daily_dose`i' if med`i' == "Hidroclorotiazida"
		}

	gen daily_dose_metformin = .

		forvalues i = 1/6 {
			replace daily_dose_metformin = daily_dose`i' if med`i' == "Metformina"
		}

	gen daily_dose_glimepiride = .

		forvalues i = 1/6 {
			replace daily_dose_glimepiride = daily_dose`i' if med`i' == "Glimepirida"
		}

	gen daily_dose_glibenclamide = .

		forvalues i = 1/6 {
			replace daily_dose_glibenclamide = daily_dose`i' if med`i' == "Glibenclamida"
		}
		
	* Therapeutic Intensity Score (TIS)

	/* 	See page 12 of Derington for the calculation. It is basically just the sum
		of the medications' fraction of total dose.
		
		Levy PD, et al. J Am Soc Hypertens 2016;10:906-16

		Derington CG, et al. Hypertension 2023;80:590-7
		
		*/
		
	gen tis = 0

		forvalues i = 1/6 {
			replace tis = tis + daily_dose`i'/40 if med`i' == "Enalapril"
			replace tis = tis + daily_dose`i'/100 if med`i' == "Losartan Potásico"
			replace tis = tis + daily_dose`i'/50 if med`i' == "Hidroclorotiazida"
		}

		replace tis = . if tis == 0
		
	* Medication effect score

	/* 	MES is calculated for each diabetes medication in a regimen using the following
		equation: (actual drug dose/maximum drug dose)  drug-specific adjustment factor.
		
		Alexopoulos AS, et al. Chronic Illn 2021;17:451-62.	*/
		
	gen mes = 0

		forvalues i = 1/6 {
			replace mes = mes + daily_dose`i'/2550*1.5 if med`i' == "Metformina"
			replace mes = mes + daily_dose`i'/8*1.5 if med`i' == "Glimepirida"
			replace mes = mes + daily_dose`i'/20*1.5 if med`i' == "Glibenclamida"
		}

		replace mes = . if mes == 0
	
* Generating a unique patient-level id -----------------------------------------
	
	* Calculating date of birth

		/* SIGSA data only gives us the age of the patient. Therefore, we must try to back
		into the patient's date of birth. This is useful because it is essentially the only
		way to uniquely identify patients coming for consultas on different days. This is 
		required to do wide shape #2 as described above. */

	* Calculate total age in days, accounting for leap years and average month length
	generate total_age_days = age_year * 365.25 + age_months * 30.4375 + age_days

	* Subtract total age in days from current_date to get date of birth
	generate dob = visit_date - total_age_days

	* Format the date of birth as a date or as a number
	format dob %td
	// format %9.0g dob // this formats as a number

	* Going row by row and calculating difference in dob from prior row
	bysort name_patient community (dob): gen diff_dob = dob - dob[_n-1] 

		/* When you use the by or bysort command with variables in parentheses, it 
		indicates that the data should be sorted by the variables inside the parentheses
		within each group defined by the preceding variables. Thus you don't need to
		use the abs() function or worry about negative values. */
		
	sum diff_dob, d // note that using just 1.5 days uniquely identify 95% of follow-up visits
		
	* Making a match if the person shares full name, community, and dob within 5 days
		/* This is useful because it will allow us to approximately uniquely identify
		patients for the 2nd wide reshaping below. */
	gen dup = 0
	bysort name_patient community (dob): replace dup = 1 if diff_dob <5

	drop diff_dob

	* Generates a unique patient-level id 
	gen id = .
	replace id = _n if dup == 0 // all dup == 0 are first in a series of patients visits

	* Fill in the missing id numbers 
		// https://journals.sagepub.com/doi/pdf/10.1177/1536867X231196519
	replace id = id[_n - 1] if missing(id) // copying downward missing id

	* Generate the median dob for each patient
	egen dob_median = median(dob), by(id)
	format dob_median %td

	order id name_patient community visit_date dob dob_median dup

	drop dob age_days age_months age_years dup // total_age_days // needed for participant characteristics

* Analysis of hypertension treatment rate --------------------------------------
	
	* Hypertension treatment rate in Solola

	egen tot_treated_htn_solola = total(htn_med_any) if health_facility_solola == 1, by(four_week_bins_solola)
	tab four_week_bins_solola htn_med_any if health_facility_solola == 1

	preserve
	keep if health_facility_solola == 1 
	
	* Generate an id variable within each month
	bysort four_week_bins_solola: gen month_id=(_n) 
	label variable month_id "Within month id"

	* Move up the month to help with interpretation keep the aggregate data
	gen intervention_month = four_week_bins_solola + 1
	
	* Only keep the aggregate data
	keep if month_id == 1
	drop if intervention_month == .
	keep intervention_month tot_treated_htn_solola
	
	* Save
	save "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/solola_htn.dta", replace
	
	restore
	
	* Hypertension treatment rate in Esquipulas
	egen tot_treated_htn_esquipulas = total(htn_med_any) if health_facility_esquipulas == 1, by(four_week_bins_esquipulas)
	tab tot_treated_htn_esquipulas htn_med_any if health_facility_esquipulas == 1

	preserve

	keep if health_facility_esquipulas == 1 
	
	* Generate an id variable within each month
	bysort four_week_bins_esquipulas: gen month_id=(_n) 
	label variable month_id "Within month id"
	
	* Move up the month to help with interpretation keep the aggregate data
	gen intervention_month = four_week_bins_esquipulas + 1
	
	* Only keep the aggregate data
	keep if month_id == 1
	drop if intervention_month == .
	keep intervention_month tot_treated_htn_esquipulas
		
	* Merge
	merge 1:1 intervention_month using "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/solola_htn.dta"
	drop _merge
	
	* Graph the results
	
	gen tot_treated_htn_both = tot_treated_htn_esquipulas + tot_treated_htn_solola
	list // get the output logged
	
	
	* Single group linear ITSA: http://www.lindenconsulting.org/documents/ITSA_Article.pdf
	tsset intervention_month
	
		* Local macros for text size
		local text_size_small "3.0rs"	
		local text_size_big "3.5rs"	

	* itsa see: Linden A. The Stata Journal 2015;15:480-500.
	itsa tot_treated_htn_both, single trperiod(1)  lag(1) replace posttrend figure( ///
			xsize(6) ysize(4) ///
			graphregion(color(none) lcolor(none) margin(l=3 r=3 b=3 t=5)) ///
			plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
			ytitle("Absolute number treated for hypertension", ///
				margin(l=0 r=0 b=0 t=0) size(`text_size_big') angle(vertical))  	///
			xtitle("Month since implementation", ///
				margin(l=0 r=0 b=0 t=2) size(`text_size_big') angle(horizontal))  	///
			yscale(r(0 250) lcolor(black) titlegap(0)) ///
			ylabel(0 (50) 250, labcolor(black) angle(horizontal) labsize(`text_size_small')) ///
			xscale(r(-9 6) lcolor(black) titlegap(0)) ///
			xlabel(-8 (2) 6, labcolor(black) angle(horizontal) labsize(`text_size_small'))		///
			subtitle("") ///
			note("", ring(0) pos(12) margin(l=0 r=0 b=0 t=0) size(`text_size_big'))   	///
			legend( ///
				size(`text_size_big') ///
				region(color(none) lwidth(none)) ///
				region(fcolor(none) lcolor(none) margin(l=0 r=0 b=0 t=0))) 	///
		 name(itsa_tot_treated_htn_both, replace)) 	 
		 
	gr_edit note.style.editstyle margin(0 0 3.5 0) editcopy
	gr_edit note.style.editstyle drawbox(yes) editcopy
	gr_edit note.style.editstyle fillcolor(white) editcopy
	gr_edit note.style.editstyle linestyle(color(none)) editcopy
	gr_edit plotregion1._xylines[1].z = 0

	actest, lags(12)

	graph save "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Figures/itsa_tot_treated_htn_both_${date}.gph", replace         
	graph export "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Figures/itsa_tot_treated_htn_both_${date}.pdf", as(pdf) name(itsa_tot_treated_htn_both) replace
	graph export "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Figures/itsa_tot_treated_htn_both_${date}.png", as(png) name(itsa_tot_treated_htn_both) replace		 
			 
	restore
			 
tot_treated_dm_solola			 
			 
	* Diabetes treatment rate in Solola

	egen tot_treated_dm_solola = total(dm_med_any) if health_facility_solola == 1, by(four_week_bins_solola)
	tab four_week_bins_solola dm_med_any if health_facility_solola == 1

	preserve
	keep if health_facility_solola == 1 
	
	* Generate an id variable within each month
	bysort four_week_bins_solola: gen month_id=(_n) 
	label variable month_id "Within month id"

	* Move up the month to help with interpretation keep the aggregate data
	gen intervention_month = four_week_bins_solola + 1
	
	* Only keep the aggregate data
	keep if month_id == 1
	drop if month_id == .
	keep intervention_month tot_treated_dm_solola
	
	* Save
	save "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/solola_dm.dta", replace
	
	restore


	* Diabetes treatment rate in Esquipulas

	egen tot_treated_dm_esquipulas = total(dm_med_any) if health_facility_esquipulas == 1, by(four_week_bins_esquipulas)
	tab four_week_bins_esquipulas dm_med_any if health_facility_esquipulas == 1

	preserve
	keep if health_facility_esquipulas == 1 
	
	* Generate an id variable within each month
	bysort four_week_bins_esquipulas: gen month_id=(_n) 
	label variable month_id "Within month id"
	
	* Move up the month to help with interpretation keep the aggregate data
	gen intervention_month = four_week_bins_esquipulas + 1
	
	* Only keep the aggregate data
	keep if month_id == 1
	drop if month_id == .
	keep intervention_month tot_treated_dm_esquipulas

	* Merge
	merge 1:1 intervention_month using "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata temp files/solola_dm.dta"
	
	* Graph the results
	
	gen tot_treated_dm_both = tot_treated_dm_esquipulas + tot_treated_dm_solola
	list // get the output logged
	
	* Single group linear ITSA: http://www.lindenconsulting.org/documents/ITSA_Article.pdf
	tsset intervention_month
	
		* Local macros for text size
		local text_size_small "3.0rs"	
		local text_size_big "3.5rs"	

	* itsa see: Linden A. The Stata Journal 2015;15:480-500.
	itsa tot_treated_dm_both, single trperiod(1)  lag(1) family(poisson) link(identity) replace posttrend figure( ///
			xsize(6) ysize(4) ///
			graphregion(color(none) lcolor(none) margin(l=3 r=3 b=3 t=5)) ///
			plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
			ytitle("Absolute number treated for diabetes", ///
				margin(l=0 r=0 b=0 t=0) size(`text_size_big') angle(vertical))  	///
			xtitle("Month since implementation", ///
				margin(l=0 r=0 b=0 t=2) size(`text_size_big') angle(horizontal))  	///
			yscale(r(0 150) lcolor(black) titlegap(0)) ///
			ylabel(0 (25) 150, labcolor(black) angle(horizontal) labsize(`text_size_small')) ///
			xscale(r(-9 6) lcolor(black) titlegap(0)) ///
			xlabel(-8 (2) 6, labcolor(black) angle(horizontal) labsize(`text_size_small'))		///
			subtitle("") ///
			note("", ring(0) pos(12) margin(l=0 r=0 b=0 t=0) size(`text_size_big'))   	///
			legend( ///
				size(`text_size_big') ///
				region(color(none) lwidth(none)) ///
				region(fcolor(none) lcolor(none) margin(l=0 r=0 b=0 t=0))) 	///
		 name(itsa_tot_treated_dm_both, replace)) 	 
		 
	gr_edit note.style.editstyle margin(0 0 3.5 0) editcopy
	gr_edit note.style.editstyle drawbox(yes) editcopy
	gr_edit note.style.editstyle fillcolor(white) editcopy
	gr_edit note.style.editstyle linestyle(color(none)) editcopy
	gr_edit plotregion1._xylines[1].z = 0

	actest, lags(12)

	graph save "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Figures/itsa_tot_treated_dm_both_${date}.gph", replace         
	graph export "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Figures/itsa_tot_treated_dm_both_${date}.pdf", as(pdf) name(itsa_tot_treated_dm_both) replace
	graph export "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Figures/itsa_tot_treated_dm_both_${date}.png", as(png) name(itsa_tot_treated_dm_both) replace		 
			 
	restore
	
	drop tot_treated_htn_solola tot_treated_htn_esquipulas tot_treated_dm_solola tot_treated_dm_esquipulas

* Calculating total number of visits -------------------------------------------
	
	* ONLY KEEPING INTERVENTION PERIOD VISITS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
	gen intervention_period_visits = 0
	replace intervention_period_visits = 1 if ///
		inrange(four_week_bins_solola,0,6) | ///
		inrange(four_week_bins_esquipulas,0,6)
	drop if intervention_period_visits != 1	
	
	* Defining if any htn code at the visit
	gen htn_enroll_any = 0
	replace htn_enroll_any = 1 if ///
		htn_enroll1 == 1 | ///
		htn_enroll2 == 1 | ///
		htn_enroll3 == 1 | ///
		htn_enroll4 == 1 | ///
		htn_enroll5 == 1 | ///
		htn_enroll6 == 1 //

	* Defining if any diabetes code at the visit
	gen dm_enroll_any = 0
	replace dm_enroll_any = 1 if ///
		dm_enroll1 == 1 | ///
		dm_enroll2 == 1 | ///
		dm_enroll3 == 1 | ///
		dm_enroll4 == 1 | ///
		dm_enroll5 == 1 | ///
		dm_enroll6 == 1 //
		
	* Defining if any htn OR dm code
	gen enroll_any = 0
	replace enroll_any = 1 if ///
		htn_enroll_any == 1 | ///
		dm_enroll_any == 1
	
	* Total visits and by district
	tab enroll_any
	tab health_facility_solola health_facility_esquipulas

	drop enroll_any
	drop dm_enroll1 dm_enroll2 dm_enroll3 dm_enroll4 dm_enroll5 dm_enroll6
	drop htn_enroll1 htn_enroll2 htn_enroll3 htn_enroll4 htn_enroll5 htn_enroll6
	
* Task sharing strategy: Treatment visits with non-physician health worker, %
	drop type2 type3 type4 type5 type6
	rename type1 type
	codebook type
	
	gen type_encoded = .
		replace type_encoded = 0 if type == "Medico" | type == "Médico"
		replace type_encoded = 1 if type == "AE" | type == "EP" | type == "EPS"
	
	tab type_encoded if dm_med_any == 1 | htn_med_any == 1
	tab type_encoded if dm_med_any == 1
	tab type_encoded if htn_med_any == 1

/*******************************************************************************
Saving data to be used in Chiquimula ficha analysis
*******************************************************************************/	
	
preserve

	keep if health_facility_esquipulas == 1	
	
	keep name_patient visit_date health_facility htn_med_any dm_med_any htn_enroll_any dm_enroll_any 
	
	* Clean up health_facility for the potential merge
		gen health_facility2 = .
		
		replace health_facility2 = 11 if health_facility == "(C/S) ESQUIPULAS"
		replace health_facility2 = 12 if health_facility == "(P/S) LAS PEÑAS"
		replace health_facility2 = 13 if health_facility == "(P/S) CHANMAGUA"
		replace health_facility2 = 14 if health_facility == "(P/S) TIMUSHAN"
		replace health_facility2 = 15 if health_facility == "(P/S) HORCONES"
			tab health_facility2, m
		
		drop health_facility
		rename health_facility2 health_facility
		
		gsort + visit_date
	
	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/sigsa chiquimula.dta", replace

	export delimited using /Users/dcflood/Desktop/sigsa.csv, replace
	
restore

/*******************************************************************************
Saving data to be used in Solola DHIS2 analysis
*******************************************************************************/	
	
preserve

// 	. tab health_facility
//
//                   Servicio |      Freq.     Percent        Cum.
// ---------------------------+-----------------------------------
//              (C/S) TZUNUNA |         97       11.04       11.04
// (CAP) SAN MARCOS LA LAGUNA |        149       16.95       27.99
//  (CAP) SAN PABLO LA LAGUNA |        421       47.90       75.88
// (CAP) SANTA CRUZ LA LAGUNA |        177       20.14       96.02
//            (P/S) JAIBALITO |         35        3.98      100.00
// ---------------------------+-----------------------------------
//                      Total |        879      100.00


	keep if health_facility_solola == 1	
	
	keep name_patient visit_date health_facility htn_med_any dm_med_any htn_enroll_any dm_enroll_any 
	
	
	
	* Clean up health_facility for the potential merge
		gen health_facility2 = .
		
		replace health_facility2 = 21 if health_facility == "(CAP) SAN PABLO LA LAGUNA"
		replace health_facility2 = 32 if health_facility == "(CAP) SAN MARCOS LA LAGUNA"
		replace health_facility2 = 43 if health_facility == "(CAP) SANTA CRUZ LA LAGUNA"
		replace health_facility2 = 44 if health_facility == "(P/S) JAIBALITO"
		replace health_facility2 = 45 if health_facility == "(C/S) TZUNUNA"
			tab health_facility2, m
		
		drop health_facility
		rename health_facility2 health_facility
		
		gsort + visit_date
	
	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/sigsa solola.dta", replace
	
restore
	
/*******************************************************************************
5. 2nd wide format reshaping, cleaning, and analysis: Each row is a distinct patient
*******************************************************************************/

* Reshape to 2nd wide format: Each row is a distinct patient, and columns will
* reflect different visit dates as well as subcategories of medication and diagnostic
* codes within those visit dates

* Creates within patient visit number ------------------------------------------

	sort id visit_date
	by id: gen visit_number = _n

	* Create a variable to store the last visit number
	sort id
	egen total_visit_number = max(visit_number), by(id)

drop daily_dose1 daily_dose2 daily_dose3 daily_dose4 daily_dose5 daily_dose6 //


reshape wide ///
	name_healthworker* diagnosis* icd* med* dose_mg*  dm_med* htn_med* visit_date health_facility mdate year total_age_days type ///
		four_week_bins_esquipulas four_week_bins_solola  ///
		sulfonylurea_flag enalapril_flag* losartan_flag* hctz_flag* metformin_flag* glibenclamida_flag* glimepirida_flag* ///
		daily_dose_enalapril daily_dose_losartan daily_dose_hctz daily_dose_metformin daily_dose_glimepiride daily_dose_glibenclamide ///
		htn_enroll_any dm_enroll_any ///
		count_htn_med_visit count_dm_med_visit mes tis type_encoded, ///
	i(id) ///  visit_number_total total_age_days
	j(visit_number)
	
drop total_age_days2 total_age_days3 total_age_days4 total_age_days5 total_age_days6 total_age_days7 total_age_days8 total_age_days9	

	/* The "@" denotes that visit numbers are prefixes, whereas within-visit enumerators
	will be suffixes, e.g., medication # within a visit. */

/*******************************************************************************
6. Enrollment data including required clinicaltrials.gov forms
*******************************************************************************/	

* Total enrollment and by disease based on the first visit
tab htn_enroll_any1 dm_enroll_any1, m

	* Hypertension enrollment per district, n
	di 671/2
	
	* Diabetes enrollment per district, n
	di 397/2
	
tab htn_enroll_any1 dm_enroll_any1 if health_facility_esquipulas == 1, m
tab htn_enroll_any1 dm_enroll_any1 if health_facility_solola == 1, m

* Looking at the proportion of enrolled people by diagnostic code and age are treated
* Total enrollment and by disease based on the first visit
tab htn_enroll_any1 htn_med_any1, m
tab dm_enroll_any1 dm_med_any1, m
	
/*******************************************************************************
7. Table: Participant characteristics
*******************************************************************************/	

* Total
// See above

* Age --------------------------------------------------------------------------

	gen age_years = total_age_days1/365.25
	sum age_years, d // overall
	sum age_years if female == 0
	sum age_years if female == 1

* Sex --------------------------------------------------------------------------

	tab female

* Ethnic group -----------------------------------------------------------------

	tab ethnic_group

		// . label list ethnic_group2
		// ethnic_group2:
		//      1 Garifuna
		//      2 Maya
		//      3 Mestizo-Ladino
		//      4 No Indica
		//      5 Otro
		//      6 Xinca

	gen ethnic_group_table = ethnic_group
	recode ethnic_group_table (5=4)

	label variable ethnic_group_table "Ethnic group"
	recode ethnic_group_table (2=1) (3=2) (4=3)
	label define ethnic_group_table_label 1 "Maya" 2 "Ladino or Mestizo" 3 "Other or not reported"
	label values ethnic_group_table ethnic_group_table_label

	tab ethnic_group_table
	tab ethnic_group_table female, col

* Language ---------------------------------------------------------------------

		// . label list language2
		// language2:
		//      1 Ch'orti'
		//      2 Chalchiteko
		//      3 Itza'
		//      4 Ixil
		//      5 Jakalteka
		//      6 K'iche'
		//      7 Kaqchikel
		//      8 Mam
		//      9 No indica
		//      10 Q'anjob'al
		//      11 Q'eqchi'
		//      12 Sipakapensa
		//      13 Tektiteko
		//      14 Tz'utujil
		//      15 Uspanteka

	gen language_mayan = 0
	replace language_mayan = 1 if inrange(language,1,8) | inrange(language,10,15)

	label variable language_mayan "Speaks Mayan language"

	tab language_mayan
	tab language_mayan female, col

	* Site
	tab area
	tab area female, col

* Location of care health post vs. health center -------------------------------

	gen health_facility_type = .

	replace health_facility_type = 0 if ///
		health_facility1 == "(C/S) ESQUIPULAS" | ///
		health_facility1 == "(C/S) TZUNUNA" | ///
		health_facility1 == "(CAP) SAN MARCOS LA LAGUNA" | ///
		health_facility1 == "(CAP) SAN PABLO LA LAGUNA" | ///
		health_facility1 == "(CAP) SANTA CRUZ LA LAGUNA"

	replace health_facility_type = 1 if ///
		health_facility1 == "(P/S) CHANMAGUA" | ///
		health_facility1 == "(P/S) HORCONES" | ///
		health_facility1 == "(P/S) JAIBALITO" | ///
		health_facility1 == "(P/S) LAS PEÑAS" | ///
		health_facility1 == "(P/S) TIMUSHAN"

	label variable health_facility_type "Health center or health post"	
	label define health_facility_type_label 0 "Health center or CAP" 1 "Health post"	
	label values health_facility_type health_facility_type_label

	tab health_facility_type
	tab health_facility_type female, col

* Type of health worker at initial visit ---------------------------------------
	
	* Generate an id variable within each month
	bysort name_healthworker11: gen name_healthworker11_id=(_n) 
	* list 
	gsort + area + distrito + name_healthworker11 
	* br id visit_date1 health_facility1 distrito area name_healthworker11 if name_healthworker11_id == 1

* Hypertension, diabetes, or both ----------------------------------------------
	gen condition = .
	replace condition = 1 if htn_enroll_any1 == 1 & dm_enroll_any1 != 1 // htn only
	replace condition = 2 if htn_enroll_any1 != 1 & dm_enroll_any1 == 1 // dm only
	replace condition = 3 if htn_enroll_any1 == 1 & dm_enroll_any1 == 1 // htn and dm

	label variable condition "Hypertension, diabetes, or both"	
	label define condition_label 1 "HTN only" 2 "DM only"	3 "Both HTN & DM"
	label values condition condition_label

	tab condition
	tab condition female, col

* Hypertension meds at baseline ------------------------------------------------

	* Mean number of antihypertensive medications
	sum count_htn_med_visit1 if inrange(htn_med_any1,1,10), d

	* Therapeutic Intensity Score,1,2 mean (IQR)
	sum tis1 if inrange(htn_med_any1,1,10), d

	* Enalapril, n (%)
	tab enalapril_flag1 if inrange(htn_med_any1,1,10)

	* Losartan, n (%)
	tab losartan_flag1 if inrange(htn_med_any1,1,10)

	* Hydrochlorothiazide, n (%)
	tab hctz_flag1 if inrange(htn_med_any1,1,10)

	// 	* Enalapril dose/day, mean (mg)
	// 	sum daily_dose_enalapril1 if inrange(htn_med_any1,1,10), d
	//
	// 	* Losartan dose/day, mean (mg)
	// 	sum daily_dose_losartan1 if inrange(htn_med_any1,1,10), d
	//
	// 	* Hydrochlorothiazide dose/day, mean (mg)
	// 	sum daily_dose_hctz1 if inrange(htn_med_any1,1,10), d

* Diabetes meds at baseline ----------------------------------------------------

	* Mean number of glucose-lowering medications
	sum count_dm_med_visit1 if inrange(dm_med_any1,1,10), d

	* Medication Effect Score, mean (SD)
	sum mes1 if inrange(dm_med_any1,1,10), d

	* Metformin, n (%)
	tab metformin_flag1 if inrange(dm_med_any1,1,10)
	
	* Sulfonylurea
	tab sulfonylurea_flag1 if inrange(dm_med_any1,1,10)

	* Glibenclamide, n (%)
	tab glibenclamida_flag1 if inrange(dm_med_any1,1,10)

	* Glimepiride, n (%)
	tab glimepirida_flag1 if inrange(dm_med_any1,1,10)
	
	* ACE/ARB, n (%)
	gen ace_arb = 0
	replace ace_arb = 1 if (enalapril_flag1 == 1 | losartan_flag1 == 1)
	
	tab ace_arb if inrange(dm_med_any1,1,10)

		// * Metformin dose/day, mean (mg)
		// sum daily_dose_metformin1 if inrange(dm_med_any1,1,10), d
		//
		// * Glibenclamide dose/day, mean (mg)
		// sum daily_dose_glimepiride1 if inrange(dm_med_any1,1,10), d
		//
		// * Glimepiride dose/day, mean (mg)
		// sum daily_dose_glibenclamide1 if inrange(dm_med_any1,1,10), d

* Proportion with follow-up visit within 3 months ------------------------------
 
	*** Pre-specified clinicaltrials.gov outcome. *** 
	
	/* 	"Proportion of patients with follow-up visit within 3 months, among those
		enrolled with ≥3 months remaining in implementation period (%)" */
	
	// 	Sololá start: October 5, 2023
	// 	Esquipulas start: November 12, 2023
	
	gen follow_up_3_months = 0 if visit_date1 < (mdy(10, 5, 2023) + 84) & health_facility_solola == 1 // solola
	replace follow_up_3_months = 0 if visit_date1 < (mdy(11, 12, 2023) + 84) & health_facility_esquipulas == 1 // solola
	
	replace follow_up_3_months = 1 if (visit_date2 - visit_date1) <= 84 & follow_up_3_months != .
		
	tab follow_up_3_months
	
	tab follow_up_3_months if dm_enroll_any1 == 0 & htn_enroll_any1 == 1
	tab follow_up_3_months if dm_enroll_any1 == 1 & htn_enroll_any1 == 0
	tab follow_up_3_months if dm_enroll_any1 == 1 & htn_enroll_any1 == 1

* Number of visits per patient, mean -------------------------------------------

	* Create a new variable to count the number of visits
	gen visit_count = 0

	* Loop through each visit date variable and increment the count if the visit is not missing
	foreach var of varlist visit_date1 visit_date2 visit_date3 visit_date4 visit_date5 visit_date6 visit_date7 visit_date8 visit_date9 {
		replace visit_count = visit_count + 1 if !missing(`var')
	}
	
	sum visit_count, d
	
* Adoption: Health facilities enrolling at least one patient with hypertension or diabetes, n (%)

	tab health_facility1, m
	local n_facilities = r(r)
	di `n_facilities'
	
	di `n_facilities'/10

